/*==============================================================================
FILE NAME: Figure_A5.do
CREATED: 21 July 2025
==============================================================================*/

**Figure A5

/* Set directory if working independently through code
if c(username)=="" { //insert username
	global rootdir "" // insert root path
	global processed_data "$rootdir/processed_data" 
	global figures "$rootdir/output/figures"  // Define global paths for replication package
} 
*/

//county-by-month fe
use "$processed_data/Air_Panel.dta", clear
drop if never_air_inv == 1

// Create panel time variable 
egen t = group(year month)
xtset RN_id t

// Create variable for post-incident outcomes
forv h = 0/12 {
	gen p_air_inv_`h' = f`h'.p_air_inv - l1.p_air_inv
}

// Create variable for pre-incident placebo
forv h = 2/12 {
	gen p_air_inv_neg`h' = l`h'.p_air_inv - l1.p_air_inv
}

egen RN_year = group(RN year)



cap drop b u d se Years Zero
gen Years = _n-13 if _n<=25
gen Zero = 0 if _n <=25
gen b = 0
gen se = 0
gen u = 0 
gen d = 0
egen county_id=group(county)
egen county_month=group(county t)

// Regressions for post-incident outcomes
foreach h in 0 1 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_inv_`h'  p_air_incident, absorb(RN_year county_month) cluster(RN_id )
unique t if e(sample)
replace b = _b[p_air_incident] if Years == `h'
replace se = _se[p_air_incident] if Years == `h'
replace u = (_b[p_air_incident] + 1.96*_se[p_air_incident]) if Years == `h'
replace d = (_b[p_air_incident] - 1.96*_se[p_air_incident]) if Years == `h'
}

// Regressions for pre-incident placebo
foreach h in 2 3 4 5 6 7 8 9 10 11 12 {
reghdfe p_air_inv_neg`h'  p_air_incident, absorb(RN_year county_month) cluster(RN_id)
unique t if e(sample)
replace b = _b[p_air_incident] if Years == -`h'
replace se = _se[p_air_incident] if Years == -`h'
replace u = (_b[p_air_incident] + 1.96*_se[p_air_incident]) if Years == -`h'
replace d = (_b[p_air_incident] - 1.96*_se[p_air_incident]) if Years == -`h'
}
preserve
keep if Years != .
keep b u d se Years Zero
export delimited "$point_estimates/Point_Estimates_Figure_A5_Panel_A.csv", replace
graph set window fontface "Times New Roman"
twoway(rarea u d Years, col(navy) fint(inten20) lwidth(0) lpattern(solid))(line b Years, lcolor(navy) lpattern(solid) lwidth(medium))(line Zero Years, lcolor(black)), xlabel(-12(1)12, nogrid labsize(vlarge)) ylabel(0(0.1)0.6, labsize(vlarge)) legend(off) ytitle("{&Delta} P(Investigation)", size(vlarge)) ylabel(,labsize(vlarge)) xtitle("Month", size(vlarge)) graphregion(color(white)) plotregion(color(white)) xsize(8.6)
// Save figure
graph export "$figures/Figure_A5_Panel_A.pdf", replace

restore

// Panel B
// Load facility characteristics and create SIC code indicators
use "$processed_data/facility_characteristics.dta", clear
keep RegulatedEntityNo SIC
drop if SIC==""                                 
gen dig2_SIC = substr(SIC, 1, 2)                
destring dig2_SIC, replace
tab dig2_SIC                                    
drop SIC
duplicates drop                                 

// Generate numeric RN_id from RegulatedEntityNo
gen RN_id = substr(RegulatedEntityNo, 3, .)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
drop RegulatedEntityNo
sort RN_id dig2_SIC

// Reshape to wide format to detect multiple SIC codes per RN
by RN_id: gen num = _n
reshape wide dig2_SIC, i(RN_id) j(num)

// Flag facilities with multiple SICs
gen multiple_SIC = 0
replace multiple_SIC = 1 if dig2_SIC2 ~= .
replace dig2_SIC1 = 100 if multiple_SIC == 1    

sort RN_id
save "$processed_data/temp.dta", replace

// Prepare panel data and generate treatment dummies

// Drop facilities that never had an air investigation
use "$processed_data/Air_Panel.dta", clear
drop if never_air_inv == 1                      

// Generate unique month-year identifier
egen t = group(year month)                      

// Generate variables for post-incident outcomes
forv h = 0/12 {
	gen p_air_inv_`h' = f`h'.p_air_inv - l1.p_air_inv
}

// Generate variables for pre-incident horizons
forv h = 2/12 {
	gen p_air_inv_neg`h' = l`h'.p_air_inv - l1.p_air_inv
}

egen RN_year = group(RN year)                   

// Merge SIC codes (industry info) and prepare fixed effects

sort RN_id
merge m:1 RN_id using "$processed_data/temp.dta"    
unique RN_id if _merge == 3                         
keep if _merge == 3
unique RN_id if multiple_SIC == 1                   
drop dig2_SIC2-dig2_SIC12                           
unique RN_id if dig2_SIC1 ~= .                      
tab dig2_SIC1                                       

// Initialize storage variables
cap drop b u d se Years Zero
gen Years = _n - 13 if _n <= 25                   
gen Zero = 0 if _n <= 25
gen b = 0
gen se = 0
gen u = 0
gen d = 0

// Generate higher-order fixed effects
egen county_id = group(county)
egen county_month = group(county t)              
egen industry_month = group(dig2_SIC1 t)

// Run event study regressions

// Regressions for post-incident outcomes (0 to 12)
foreach h in 0 1 2 3 4 5 6 7 8 9 10 11 12 {
    reghdfe p_air_inv_`h' p_air_incident, ///
        absorb(RN_year industry_month county_month) ///
        cluster(RN_id)
    replace b = _b[p_air_incident] if Years == `h'
    replace se = _se[p_air_incident] if Years == `h'
    replace u = _b[p_air_incident] + 1.96 * _se[p_air_incident] if Years == `h'
    replace d = _b[p_air_incident] - 1.96 * _se[p_air_incident] if Years == `h'
}

// Regressionf for pre-incident outcomes
foreach h in 2 3 4 5 6 7 8 9 10 11 12 {
    reghdfe p_air_inv_neg`h' p_air_incident, ///
        absorb(RN_year industry_month county_month) ///
        cluster(RN_id)
    replace b = _b[p_air_incident] if Years == -`h'
    replace se = _se[p_air_incident] if Years == -`h'
    replace u = _b[p_air_incident] + 1.96 * _se[p_air_incident] if Years == -`h'
    replace d = _b[p_air_incident] - 1.96 * _se[p_air_incident] if Years == -`h'
}


// Export and Plot
preserve
keep if Years != .
keep b u d se Years Zero

// Export point estimates for reproducibility / publication
export delimited "$point_estimates/Point_Estimates_Figure_A5_Panel_B.csv", replace
graph set window fontface "Times New Roman"
twoway ///
    (rarea u d Years, col(navy) fint(inten20) lwidth(0) lpattern(solid)) ///
    (line b Years, lcolor(navy) lpattern(solid) lwidth(medium)) ///
    (line Zero Years, lcolor(black)), ///
    xlabel(-12(1)12, nogrid labsize(vlarge)) ///
    ylabel(0(0.1)0.6, labsize(vlarge)) ///
    legend(off) ///
    ytitle("{&Delta} P(Investigation)", size(vlarge)) ///
    ylabel(, labsize(vlarge)) ///
    xtitle("Month", size(vlarge)) ///
    graphregion(color(white)) ///
    plotregion(color(white)) ///
    xsize(8.6)

// Save figure
graph export "$figures/Figure_A5_Panel_B.pdf", replace
restore